In [1]:
# importing required libraries
import numpy as np
import pandas as pd
import plotly
In [2]:
# importing csv files as pandas dataframes
races = pd.read_csv("F1_data/races.csv")
circuits = pd.read_csv("F1_data/circuits.csv")
display(races)
display(circuits)
raceId year round circuitId name date time url fp1_date fp1_time fp2_date fp2_time fp3_date fp3_time quali_date quali_time sprint_date sprint_time
0 1 2009 1 1 Australian Grand Prix 2009-03-29 06:00:00 http://en.wikipedia.org/wiki/2009_Australian_G... \N \N \N \N \N \N \N \N \N \N
1 2 2009 2 2 Malaysian Grand Prix 2009-04-05 09:00:00 http://en.wikipedia.org/wiki/2009_Malaysian_Gr... \N \N \N \N \N \N \N \N \N \N
2 3 2009 3 17 Chinese Grand Prix 2009-04-19 07:00:00 http://en.wikipedia.org/wiki/2009_Chinese_Gran... \N \N \N \N \N \N \N \N \N \N
3 4 2009 4 3 Bahrain Grand Prix 2009-04-26 12:00:00 http://en.wikipedia.org/wiki/2009_Bahrain_Gran... \N \N \N \N \N \N \N \N \N \N
4 5 2009 5 4 Spanish Grand Prix 2009-05-10 12:00:00 http://en.wikipedia.org/wiki/2009_Spanish_Gran... \N \N \N \N \N \N \N \N \N \N
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1074 1092 2022 18 22 Japanese Grand Prix 2022-10-09 05:00:00 http://en.wikipedia.org/wiki/2022_Japanese_Gra... 2022-10-07 04:00:00 2022-10-07 08:00:00 2022-10-08 04:00:00 2022-10-08 07:00:00 \N \N
1075 1093 2022 19 69 United States Grand Prix 2022-10-23 19:00:00 http://en.wikipedia.org/wiki/2022_United_State... 2022-10-21 19:00:00 2022-10-21 22:00:00 2022-10-22 19:00:00 2022-10-22 22:00:00 \N \N
1076 1094 2022 20 32 Mexico City Grand Prix 2022-10-30 20:00:00 http://en.wikipedia.org/wiki/2022_Mexican_Gran... 2022-10-28 18:00:00 2022-10-28 21:00:00 2022-10-29 17:00:00 2022-10-29 20:00:00 \N \N
1077 1095 2022 21 18 Brazilian Grand Prix 2022-11-13 18:00:00 http://en.wikipedia.org/wiki/2022_Brazilian_Gr... 2022-11-11 15:30:00 2022-11-12 15:30:00 \N \N 2022-11-11 19:00:00 2022-11-12 19:30:00
1078 1096 2022 22 24 Abu Dhabi Grand Prix 2022-11-20 13:00:00 http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr... 2022-11-18 09:00:00 2022-11-18 12:00:00 2022-11-19 10:00:00 2022-11-19 13:00:00 \N \N

1079 rows × 18 columns

circuitId circuitRef name location country lat lng alt url
0 1 albert_park Albert Park Grand Prix Circuit Melbourne Australia -37.84970 144.96800 10 http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1 2 sepang Sepang International Circuit Kuala Lumpur Malaysia 2.76083 101.73800 18 http://en.wikipedia.org/wiki/Sepang_Internatio...
2 3 bahrain Bahrain International Circuit Sakhir Bahrain 26.03250 50.51060 7 http://en.wikipedia.org/wiki/Bahrain_Internati...
3 4 catalunya Circuit de Barcelona-Catalunya Montmeló Spain 41.57000 2.26111 109 http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4 5 istanbul Istanbul Park Istanbul Turkey 40.95170 29.40500 130 http://en.wikipedia.org/wiki/Istanbul_Park
... ... ... ... ... ... ... ... ... ...
71 75 portimao Autódromo Internacional do Algarve Portimão Portugal 37.22700 -8.62670 108 http://en.wikipedia.org/wiki/Algarve_Internati...
72 76 mugello Autodromo Internazionale del Mugello Mugello Italy 43.99750 11.37190 255 http://en.wikipedia.org/wiki/Mugello_Circuit
73 77 jeddah Jeddah Corniche Circuit Jeddah Saudi Arabia 21.63190 39.10440 15 http://en.wikipedia.org/wiki/Jeddah_Street_Cir...
74 78 losail Losail International Circuit Al Daayen Qatar 25.49000 51.45420 \N http://en.wikipedia.org/wiki/Losail_Internatio...
75 79 miami Miami International Autodrome Miami USA 25.95810 -80.23890 \N http://en.wikipedia.org/wiki/Miami_Internation...

76 rows × 9 columns

Question 2¶

Which country hosted most races?¶

In [3]:
# creating subsets of columns with the required columns
races_subset = races[['circuitId', 'name','year','date']]
circuits_subset = circuits[['circuitId', 'name','location','country']]
display(races_subset)
display(circuits_subset)
circuitId name year date
0 1 Australian Grand Prix 2009 2009-03-29
1 2 Malaysian Grand Prix 2009 2009-04-05
2 17 Chinese Grand Prix 2009 2009-04-19
3 3 Bahrain Grand Prix 2009 2009-04-26
4 4 Spanish Grand Prix 2009 2009-05-10
... ... ... ... ...
1074 22 Japanese Grand Prix 2022 2022-10-09
1075 69 United States Grand Prix 2022 2022-10-23
1076 32 Mexico City Grand Prix 2022 2022-10-30
1077 18 Brazilian Grand Prix 2022 2022-11-13
1078 24 Abu Dhabi Grand Prix 2022 2022-11-20

1079 rows × 4 columns

circuitId name location country
0 1 Albert Park Grand Prix Circuit Melbourne Australia
1 2 Sepang International Circuit Kuala Lumpur Malaysia
2 3 Bahrain International Circuit Sakhir Bahrain
3 4 Circuit de Barcelona-Catalunya Montmeló Spain
4 5 Istanbul Park Istanbul Turkey
... ... ... ... ...
71 75 Autódromo Internacional do Algarve Portimão Portugal
72 76 Autodromo Internazionale del Mugello Mugello Italy
73 77 Jeddah Corniche Circuit Jeddah Saudi Arabia
74 78 Losail International Circuit Al Daayen Qatar
75 79 Miami International Autodrome Miami USA

76 rows × 4 columns

In [4]:
# renaming column names for better understanding
circuits_subset_renamed = circuits_subset.rename(columns={'name':'Name of the stadium'})
races_subset_renamed = races_subset.rename(columns={'name':'Race Name'})
In [5]:
# joining the 2 datasets to create a final database for analysis
race_circuits_combined=pd.merge(races_subset_renamed,circuits_subset_renamed,on='circuitId',how='left')
display(race_circuits_combined)
circuitId Race Name year date Name of the stadium location country
0 1 Australian Grand Prix 2009 2009-03-29 Albert Park Grand Prix Circuit Melbourne Australia
1 2 Malaysian Grand Prix 2009 2009-04-05 Sepang International Circuit Kuala Lumpur Malaysia
2 17 Chinese Grand Prix 2009 2009-04-19 Shanghai International Circuit Shanghai China
3 3 Bahrain Grand Prix 2009 2009-04-26 Bahrain International Circuit Sakhir Bahrain
4 4 Spanish Grand Prix 2009 2009-05-10 Circuit de Barcelona-Catalunya Montmeló Spain
... ... ... ... ... ... ... ...
1074 22 Japanese Grand Prix 2022 2022-10-09 Suzuka Circuit Suzuka Japan
1075 69 United States Grand Prix 2022 2022-10-23 Circuit of the Americas Austin USA
1076 32 Mexico City Grand Prix 2022 2022-10-30 Autódromo Hermanos Rodríguez Mexico City Mexico
1077 18 Brazilian Grand Prix 2022 2022-11-13 Autódromo José Carlos Pace São Paulo Brazil
1078 24 Abu Dhabi Grand Prix 2022 2022-11-20 Yas Marina Circuit Abu Dhabi UAE

1079 rows × 7 columns

In [6]:
# rearranging columns for better visibility
Final_dataset = race_circuits_combined[['circuitId', 'Race Name', 'Name of the stadium', 'country', 'location','date','year']]
Final_dataset.head()
Out[6]:
circuitId Race Name Name of the stadium country location date year
0 1 Australian Grand Prix Albert Park Grand Prix Circuit Australia Melbourne 2009-03-29 2009
1 2 Malaysian Grand Prix Sepang International Circuit Malaysia Kuala Lumpur 2009-04-05 2009
2 17 Chinese Grand Prix Shanghai International Circuit China Shanghai 2009-04-19 2009
3 3 Bahrain Grand Prix Bahrain International Circuit Bahrain Sakhir 2009-04-26 2009
4 4 Spanish Grand Prix Circuit de Barcelona-Catalunya Spain Montmeló 2009-05-10 2009
In [7]:
# calculating summary 1 for the number of races across countries and various racetracks
summary1 = pd.DataFrame(Final_dataset.groupby(['country','circuitId'], as_index = False)['Race Name'].count())
summary1 = summary1.rename(columns={'Race Name':'No_of_races'})
display(summary1)
country circuitId No_of_races
0 Argentina 25 20
1 Australia 1 25
2 Australia 29 11
3 Austria 57 1
4 Austria 70 36
... ... ... ...
71 USA 46 20
72 USA 60 1
73 USA 63 1
74 USA 69 10
75 USA 79 1

76 rows × 3 columns

In [8]:
# creating the final summary for the total number of races across all countries
summary2 = pd.DataFrame(summary1.groupby(['country'], as_index = False)['No_of_races'].sum())
display(summary2)
country No_of_races
0 Argentina 20
1 Australia 36
2 Austria 37
3 Azerbaijan 6
4 Bahrain 19
5 Belgium 67
6 Brazil 49
7 Canada 51
8 China 16
9 France 63
10 Germany 79
11 Hungary 37
12 India 3
13 Italy 104
14 Japan 38
15 Korea 4
16 Malaysia 19
17 Mexico 22
18 Monaco 68
19 Morocco 1
20 Netherlands 32
21 Portugal 18
22 Qatar 1
23 Russia 8
24 Saudi Arabia 2
25 Singapore 13
26 South Africa 23
27 Spain 59
28 Sweden 6
29 Switzerland 5
30 Turkey 9
31 UAE 14
32 UK 77
33 USA 73
In [9]:
# plotting the graph for summary 2

import plotly.express as px
fig = px.bar(summary2, x="country", y="No_of_races",
             color='country',facet_row_spacing = 1,facet_col_spacing=1,title='Distriution of races hosted by countries across all seasons')
fig.update_traces(width=1)
fig.update_layout(xaxis={'categoryorder':'total ascending'})
fig.update_xaxes(tickangle= 50) 
fig.show()
In [ ]: